{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import random\n",
    "import tqdm\n",
    "import jsonlines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "contents = []\n",
    "with open('/<YOUR_OWN_PATH>/ToolQA/data/raw_data/agenda/agenda_events.jsonl', 'r') as f:\n",
    "    for item in jsonlines.Reader(f):\n",
    "        contents.append(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame(contents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                      event start_time  end_time                location  \\\n",
      "0         Breakfast meeting    9:00 AM  10:00 AM           Bluebird Cafe   \n",
      "1  Sales pitch presentation   10:30 AM  11:30 AM          Hilton Chicago   \n",
      "2             Cooking class   11:00 AM   1:00 PM           Sur La Table    \n",
      "3     Lunch with colleagues    1:00 PM   2:00 PM  The Cheesecake Factory   \n",
      "4        Charity fundraiser    2:30 PM   4:30 PM        The Ritz-Carlton   \n",
      "\n",
      "    person        date           id  \n",
      "0      Mia  2022/12/15  agenda-0000  \n",
      "1  William  2022/07/05  agenda-0001  \n",
      "2    Emily  2022/03/04  agenda-0002  \n",
      "3     Adam  2022/12/02  agenda-0003  \n",
      "4    Layla  2022/08/21  agenda-0004  \n"
     ]
    }
   ],
   "source": [
    "print(df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1:00:00\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "\n",
    "def compute_duration(start_time: str, end_time: str, time_format: str = \"%I:%M %p\") -> str:\n",
    "    start_time_obj = datetime.strptime(start_time, time_format)\n",
    "    end_time_obj = datetime.strptime(end_time, time_format)\n",
    "\n",
    "    duration = end_time_obj - start_time_obj\n",
    "\n",
    "    return str(duration)\n",
    "\n",
    "\n",
    "start_time = df.iloc[0][\"start_time\"]\n",
    "end_time = df.iloc[0][\"end_time\"]\n",
    "print(compute_duration(start_time, end_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "random_indices = random.sample(range(len(contents)), 1000)\n",
    "questions = []\n",
    "question_id = 0\n",
    "for index in random_indices:\n",
    "    row = df.iloc[index]\n",
    "    content = row\n",
    "    question_indices = random.randint(0, 4)\n",
    "    if question_indices == 0:\n",
    "        # What did {person} do from {start_time} to {end_time} on {date}?\n",
    "        person = content['person']\n",
    "        start_time = content['start_time']\n",
    "        end_time = content['end_time']\n",
    "        date = content['date']\n",
    "        question = \"What did {} do from {} to {} on {}?\".format(person, start_time, end_time, date)\n",
    "        answer = content['event']\n",
    "    elif question_indices == 1:\n",
    "        # Where did {event} that {person} attended take place on {date}?\n",
    "        person = content['person']\n",
    "        event = content['event']\n",
    "        date = content['date']\n",
    "        question = \"Where did {} that {} attended take place on {}?\".format(event, person, date)\n",
    "        answer = content['location']\n",
    "    elif question_indices == 2:\n",
    "        # When did {person} attend {event} on {date}?\n",
    "        person = content['person']\n",
    "        event = content['event']\n",
    "        date = content['date']\n",
    "        question = \"When did {} attend {} on {}?\".format(person, event, date)\n",
    "        answer = content['start_time']\n",
    "    elif question_indices == 3:\n",
    "        # How long did {person} attend {event} on {date}?\n",
    "        person = content['person']\n",
    "        event = content['event']\n",
    "        date = content['date']\n",
    "        start_time = content['start_time']\n",
    "        end_time = content['end_time']\n",
    "        question = \"How long did {} attend {} on {}?\".format(person, event, date)\n",
    "        answer = compute_duration(start_time, end_time)\n",
    "    elif question_indices == 4:\n",
    "        # Who attended {event} between {start_time} and {end_time} on {date} in {location}?\n",
    "        event = content['event']\n",
    "        date = content['date']\n",
    "        start_time = content['start_time']\n",
    "        end_time = content['end_time']\n",
    "        location = content['location']\n",
    "        question = \"Who attended {} between {} and {} on {} in {}?\".format(event, start_time, end_time, date, location)\n",
    "        answer = content['person']\n",
    "    questions.append({\"qid\": \"easy-agenda-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "    if question_id >= 100:\n",
    "        break\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "with jsonlines.open('/<YOUR_OWN_PATH>/ToolQA/data/questions/easy/agenda-easy.jsonl', mode='w') as writer:\n",
    "    for row in questions:\n",
    "        writer.write(row)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
